/*Creating county-year aggregate banking variables*/

local Data "CountyBanking"

noi disp "Obtaining SOD county data..."
noi disp "Verifying that `Data'.dta does not already exist..."
* This returns an error if the file does not exist.
capture confirm file `Data'.dta

* If an error is returned, then the commands below are run.
if _rc == 601 {
	noi disp "Data not found.  Building data..."

	*******************************************************************************
	****Generating SOD county data for merge***************************************
	*******************************************************************************

	*Pulling in SOD data
	noi disp "Loading SOD data..."
	insheet using "$SourceData\SOD_Data.csv", clear
	
	preserve
		destring asset depsumbr, ignore(",") replace
		sort cert stcntybr year
		collapse (sum) depsumbr (max) asset, by(cert stcntybr year)
		compress
		save "$LocalData\SOD_Data_collapsed.dta", replace
	restore
	
	noi etime
	
	sort year
	
	noi disp "Merging in CPI data..."
	merge m:1 year using "$LocalData\CPI.dta"
		preserve
			insheet using "$SourceData\GDP.csv", clear
			keep gdp date
			gen date2 = date(date,"MDY")
			gen month = month(date2)
			gen year = year(date2)
			keep if month==6
			keep gdp year
			keep if year>2000 & year<2018
			gen temp = gdp if year==2001
			egen gdp2001 = max(temp)
			gen gdp_adj = gdp/gdp2001
			keep gdp_adj year
			sort year
			save  "$LocalData\GDP.dta", replace
		restore
	drop _merge
	merge m:1 year using "$LocalData\GDP.dta"
	
	destring asset depsumbr, ignore(",") replace
	gen assetgdp  = asset/gdp_adj
	replace asset = asset/cpaltt01usa661s

	* Drop if there is no asset data.
	drop if missing(asset)

	* Banks are "big" if they have more than $50 billion in assets"
	gen byte big = asset>50000000
	label var big "Assets > $50 billion"

	* Banks are "small" if they have more than $1 billion in assets"
	gen small = asset<1000000
	gen smallgdp = assetgdp<1000000
	label var small "Assets < $1 billion"
	
	* Banks are "small" if they have more than $1 billion in assets"
	gen mid = asset>=1000000 & asset<50000000
	label var mid "Assets $1 bn-$50 bn"

	* Generating Bank-year and Bank-County-Year deposits and branches
	bysort cert year: egen bank_dep = sum(depsumbr)
	label var bank_dep "Total Bank Deposits"
	bysort cert stcntybr year: egen bank_cnty_dep = sum(depsumbr)
	label var bank_cnty_dep "Total Bank County Deposits"
	bysort cert stcntybr year: egen bank_br = count(depsumbr)
	label var bank_br "Total Bank County Branches"
	
	* Keep only first observation by cert, stcntybr, and year
	duplicates drop cert stcntybr year, force
	
	bysort stcntybr year: egen cnty_dep = sum(bank_cnty_dep)
	label var cnty_dep "Total County Deposits"
	bysort stcntybr year: egen cnty_br = sum(bank_br)
	label var cnty_br "Total County Branches"
	bysort cert stcntybr year: gen obs = _n 
	bysort stcntybr year: egen cnty_bank = sum(obs)
	label var cnty_bank "Total County Banks"
	
	gen bank_county_wght = bank_cnty_dep/bank_dep
	sort cert year
	by cert year: egen total = sum(bank_county_wght)
	
	drop _merge
	
	save "$LocalData\Bank_Dep_Wghts.dta", replace
	save "$LocalData\Archive\\`Data'`CurrentDate'.dta", replace
	
	noi disp "Merging in merger data..."
	merge cert year using "$LocalData\merger_data.dta"
	drop if _merge==2

	*Create Variable of small-bank deposits, branches associated with a merger in a county
	gen temp 	= bank_cnty_dep 	 if small==1 & (mclosed==1 | macquiring==1)
	replace temp = 0 if missing(temp)
	bysort stcntybr year: egen mdeposit = sum(temp)
	drop temp

	gen temp 	= bank_br	 if small==1 & (mclosed==1 | macquiring==1)
	replace temp = 0 if missing(temp)
	bysort stcntybr year: egen mbranch = sum(temp)
	drop temp

	*Create Variable of small-bank deposits, branches associated with a failure in a county
	gen temp 	= bank_cnty_dep 	  if small==1 & mfailed==1
	replace temp = 0 if missing(temp)
	bysort stcntybr year: egen fdeposit = sum(temp)
	drop temp
	
	gen temp 	= bank_br 	  if small==1 & mfailed==1
	replace temp = 0 if missing(temp)
	bysort stcntybr year: egen fbranch = sum(temp)
	drop temp

	*Create Variable of small-bank deposits, branches associated with a bank closure in a county
	gen temp 	= bank_cnty_dep 	  if small==1 & mclosed==1
	replace temp = 0 if missing(temp)
	bysort stcntybr year: egen cdeposit = sum(temp)
	drop temp
	
	gen temp 	= bank_br 	  if small==1 & mclosed==1
	replace temp = 0 if missing(temp)
	bysort stcntybr year: egen cbranch = sum(temp)
	drop temp

	*Create Variable of small-bank deposits, branches associated with an acquirer closure in a county
	gen temp 	= bank_cnty_dep 	  if small==1 & macquiring==1
	replace temp = 0 if missing(temp)
	bysort stcntybr year: egen adeposit = sum(temp)
	drop temp
	
	gen temp 	= bank_br 	  if small==1 & macquiring==1
	replace temp = 0 if missing(temp)
	bysort stcntybr year: egen abranch = sum(temp)
	drop temp

	*Create Dummy variable for HQ merger
	gen temp 	= 1 	  if stcntybr==stcntyHQ & mclosed==1
	replace temp = 0 if missing(temp)
	bysort stcntybr year: egen mHQ = max(temp)
	drop temp		

	*Create Dummy variable for HQ acquiring
	gen temp 	= 1 	  if stcntybr==stcntyHQ & macquiring==1
	replace temp = 0 if missing(temp)
	bysort stcntybr year: egen acqHQ = max(temp)
	drop temp
		
	*Proportion of deposits, branches associated with mergers, failures, closures, acquisitions
	gen merg_dep = mdeposit/cnty_dep
	label var merg_dep "Ratio of merged deposits to total county deposits"
	gen merg_br = mbranch/cnty_br
	label var merg_br "Ratio of merged branches to total county branches"
	gen fail_dep = fdeposit/cnty_dep
	label var fail_dep "Ratio of failed deposits to total county deposits"
	gen fail_br = fbranch/cnty_br
	label var fail_br "Ratio of failed branches to total county branches"
	gen cl_dep = cdeposit/cnty_dep
	label var cl_dep "Ratio of closed deposits to total county deposits"
	gen cl_br = cbranch/cnty_br
	label var cl_br "Ratio of closed branches to total county branches"
	gen acq_dep = adeposit/cnty_dep
	label var acq_dep "Ratio of acquired deposits to total county deposits"
	gen acq_br = abranch/cnty_br
	label var acq_br "Ratio of acquired branches to total county branches"
	
	*Create HHI measure by county
	gen bank_cnty_dep_share2 = (bank_cnty_dep/cnty_dep)^2
	gen bank_br_share2 = (bank_br/cnty_br)^2
	bysort stcntybr year: egen hhi_dep = sum(bank_cnty_dep_share2)
	label var hhi_dep "Deposit HHI"
	bysort stcntybr year: egen hhi_br = sum(bank_br_share2)
	label var hhi_dep "Branch HHI"
		
	*Create Large deposit share by county
	gen temp = bank_cnty_dep if big==1
	bysort stcntybr year: egen big_dep = sum(temp)
	drop temp
	label var big_dep "Deposits held by large banks"
	gen large_dep_share = big_dep/cnty_dep
	label var large_dep_share "Share of deposits held by large banks"

	*Create Large branch share by county
	gen temp = bank_br if big==1
	bysort stcntybr year: egen big_br = sum(temp)
	gen large_br_share = big_br/cnty_br
	drop temp
	
	*Create Large bank by county
	gen temp = 1 if big==1
	bysort stcntybr year: egen large_bank = sum(temp)
	gen large_bank_share = large_bank/cnty_bank
	drop temp	

	*Create Small deposit share by county
	gen temp = bank_cnty_dep if small==1
	bysort stcntybr year: egen small_dep = sum(temp)
	gen small_dep_share = small_dep/cnty_dep
	drop temp

	gen temp = small_dep_share if year==2000
	bysort stcntybr year: egen small_dep_share2000 = max(temp)
	drop temp
	
		*Creating alternate definition of small deflating threshold by GDP
		gen temp = bank_cnty_dep if smallgdp==1
		bysort stcntybr year: egen smallgdp_dep = sum(temp)
		gen smallgdp_dep_share = smallgdp_dep/cnty_dep
		drop temp
		
	*Create Small branch share by county
	gen temp = bank_br if small==1
	bysort stcntybr year: egen small_br = sum(temp)
	gen small_br_share = small_br/cnty_br
	drop temp	

	*Create Small bank by county
	gen temp = 1 if small==1
	bysort stcntybr year: egen small_bank = sum(temp)
	gen small_bank_share = small_bank/cnty_bank
	drop temp	
		
	*Create medium deposit share by county
	gen temp = bank_cnty_dep if mid==1
	bysort stcntybr year: egen med_dep = sum(temp)
	gen med_dep_share = med_dep/cnty_dep
	drop temp

	*Create medium branch share by county
	gen temp = bank_br if mid==1
	bysort stcntybr year: egen med_br = sum(temp)
	gen med_br_share = med_br/cnty_br
	drop temp	


	gen temp = ln(cnty_br) if year==2000
	bysort stcntybr year: egen lnbr2000 = max(temp)
	drop temp

	*Create medium bank by county
	gen temp = 1 if mid==1
	bysort stcntybr year: egen med_bank = sum(temp)
	gen med_bank_share = med_bank/cnty_bank
	drop temp	
	
	*Proportion of branches associated with merger etc. as a proportion of small bank deposits	
	gen merg_dep2	= mdeposit/small_dep
	label var merg_dep2 "Ratio of merged deposits to small bank county deposits"
	gen fail_dep2	= fdeposit/small_dep
	label var fail_dep2 "Ratio of failed deposits to small bank county deposits"
	gen acq_dep2	= adeposit/small_dep
	label var acq_dep2 "Ratio of acquired deposits to small bank county deposits"
	gen cl_dep2		= cdeposit/small_dep
	label var cl_dep2 "Ratio of closed deposits to small bank county deposits"

	
	*Proportion of branches associated with merger etc. as a proportion of small bank deposits	
	gen merg_br2	= mbranch/small_br
	gen fail_br2	= fbranch/small_br	
	gen acq_br2		= abranch/small_br
	gen cl_br2		= cbranch/small_br	
	drop obs

	*Create national measures
	*HHI
	bysort year: egen n_dep = sum(bank_cnty_dep)
	bysort year: egen n_br = sum(bank_br)
	bysort cert year: egen nbank_cnty_dep = sum(bank_cnty_dep)
	bysort cert year: egen nbank_br  = sum(bank_br)
	by cert year: gen obs = _n 
	replace nbank_cnty_dep = . if obs>1
	replace nbank_br  = . if obs>1
	drop obs
	gen nbank_cnty_dep_share2 = (nbank_cnty_dep/n_dep)^2
	gen nbank_br_share2 = (nbank_br/n_br)^2
	bysort year: egen nhhi_dep = sum(nbank_cnty_dep_share2)
	bysort year: egen nhhi_br = sum(nbank_br_share2)	
	*Large bank share
	gen temp = nbank_cnty_dep if big==1
	bysort year: egen nbig_dep = sum(temp)
	gen temp2 = nbig_dep/n_dep
	bysort year: egen nlarge_dep_share = max(temp2)
	drop temp temp2
	*Large branch share
	gen temp = nbank_br if big==1
	bysort year: egen nbig_br = sum(temp)
	gen temp2 = nbig_br/n_br
	bysort year: egen nlarge_br_share = max(temp2)
	drop temp temp2
	*Large bank share
	gen temp = nbank_cnty_dep if small==1
	bysort year: egen nsmall_dep = sum(temp)
	gen temp2 = nsmall_dep/n_dep
	bysort year: egen nsmall_dep_share = max(temp2)
	drop temp temp2
	*Large branch share
	gen temp = nbank_br if small==1
	bysort year: egen nsmall_br = sum(temp)
	gen temp2 = nsmall_br/n_br
	bysort year: egen nsmall_br_share = max(temp2)
	drop temp temp2
		
	bysort stcntybr year: gen obs = _n 
	keep if obs==1
	rename stcntybr geography
	keep year geography cnty_dep cnty_br hhi_dep hhi_br large_dep_share big_dep big_br large_br_share small_dep small_dep_share small_br small_br_share n_dep n_br nbank_cnty_dep nbank_br nbank_cnty_dep_share2 nbank_br_share2 nhhi_dep nhhi_br nlarge_dep_share nsmall_dep_share merg_dep merg_dep2 fail_dep fail_dep2 cl_dep cl_dep2 acq_dep acq_dep2 merg_br merg_br2 fail_br fail_br2 cl_br cl_br2 acq_br acq_br2 mHQ acqHQ small_bank small_bank_share large_bank large_bank_share mdeposit fdeposit adeposit cdeposit med_* smallgdp_dep smallgdp_dep_share small_dep_share2000 lnbr2000


	preserve
		keep year geography small_dep_share
		sort geography year
		save "$LocalData\SmallDepShare.dta", replace
	restore
	
	sort geography year
	
	xtset geography year
	foreach span in 1 3 5 15{
		gen dlarge_dep_share`span' = large_dep_share - L`span'.large_dep_share
		gen dsmall_dep_share`span' = small_dep_share - L`span'.small_dep_share
		gen dsmallgdp_dep_share`span' = smallgdp_dep_share - L`span'.smallgdp_dep_share
		gen dlarge_br_share`span' = large_br_share - L`span'.large_br_share
		gen dsmall_br_share`span' = small_br_share - L`span'.small_br_share

		gen dhhi_dep`span' = hhi_dep - L`span'.hhi_dep
		gen dhhi_br`span'  = hhi_br - L`span'.hhi_br

		gen gbig_dep`span' = log(big_dep+1) - log(L`span'.big_dep+1)
		gen gsmall_dep`span' = log(small_dep+1) - log(L`span'.small_dep+1)
		gen gsmallgdp_dep`span' = log(smallgdp_dep+1) - log(L`span'.smallgdp_dep+1)
		gen gmed_dep`span' = log(med_dep+1) - log(L`span'.med_dep+1)

		gen gbig_br`span' = log(big_br+1) - log(L`span'.big_br+1)
		gen gsmall_br`span' = log(small_br+1) - log(L`span'.small_br+1)

		gen glarge_bank`span' = log(large_bank+1) - log(L`span'.large_bank+1)
		gen gsmall_bank`span' = log(small_bank+1) - log(L`span'.small_bank+1)
	}


bysort year: egen avghhi_dep = mean(hhi_dep)
bysort year: egen avsmall_dep_share = mean(small_dep_share)
sort year
	
	
noi disp "Saving `Data'.dta..."
compress
save "$LocalData\\`Data'.dta", replace
save "$LocalData\Archive\\`Data'`CurrentDate'.dta", replace

}

* This runs if no error was returned.
else noi disp "Data already exists."
noi etime
noi disp " "
